In [1]:
import sys
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC

print('NOVA_HOME is at', os.getenv('NOVA_HOME'))
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2

from utils import *
from manuscript.plot_config import PlotConfig
NOVA_HOME is at /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
In [9]:
 
In [2]:
cfg = PlotConfig()
color_mappings = cfg.COLOR_MAPPINGS_MARKERS  
# build label_map 
label_map = {k: v["alias"] for k, v in color_mappings.items()}
In [3]:
common = dict(
            batches=[1,2,3],
            classifier_class=LinearSVC,
            classifier_kwargs={"C": 1.0, "max_iter": 1000, "random_state": 42},
            train_each_as_singleton=True, ## Train on one batch, test on rest
            label_map=label_map
        )
In [10]:
dataset_config = {
    "path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen",
    "multiplexed": False,
    "config_fmt": "NIH_UMAP1_DatasetConfig_B{batch}",
    "config_dir": "manuscript/manuscript_figures_data_config",
}
In [11]:
run_baseline_model(
    dataset_config= dataset_config,
    **common
)
2025-09-17 11:45:03 INFO: [load_embeddings] multiplex=False
2025-09-17 11:45:03 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:45:03 INFO: [load_embeddings] input_folders = ['batch1']
2025-09-17 11:45:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
Loading all batches...
2025-09-17 11:45:05 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:45:06 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:45:07 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:45:07 INFO: [load_embeddings] embeddings shape: (115587, 192)
2025-09-17 11:45:07 INFO: [load_embeddings] labels shape: (115587,)
2025-09-17 11:45:07 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-09-17 11:45:07 INFO: [load_embeddings] paths shape: (115587,)
2025-09-17 11:45:07 INFO: [load_embeddings] multiplex=False
2025-09-17 11:45:07 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:45:07 INFO: [load_embeddings] input_folders = ['batch2']
2025-09-17 11:45:07 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-09-17 11:45:10 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:45:10 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:45:11 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:45:11 INFO: [load_embeddings] embeddings shape: (94059, 192)
2025-09-17 11:45:11 INFO: [load_embeddings] labels shape: (94059,)
2025-09-17 11:45:11 INFO: [load_embeddings] example label: DCP1A_WT_Untreated
2025-09-17 11:45:11 INFO: [load_embeddings] paths shape: (94059,)
2025-09-17 11:45:11 INFO: [load_embeddings] multiplex=False
2025-09-17 11:45:11 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:45:11 INFO: [load_embeddings] input_folders = ['batch3']
2025-09-17 11:45:11 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
2025-09-17 11:45:13 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:45:14 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:45:14 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:45:15 INFO: [load_embeddings] embeddings shape: (87130, 192)
2025-09-17 11:45:15 INFO: [load_embeddings] labels shape: (87130,)
2025-09-17 11:45:15 INFO: [load_embeddings] example label: TUJ1_WT_Untreated
2025-09-17 11:45:15 INFO: [load_embeddings] paths shape: (87130,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3].

=== Fold (test=[2, 3]) ===
Train: (115587, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (181189, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Nucleus: 30428
Microtubule: 22601
Lysosome: 3067
P-Bodies: 2364
TDP-43 granules: 2534
Paraspeckles: 2622
Presynapse: 2454
Coated vesicles: 2439
Peroxisome: 2505
PURA granules: 2712
Stress granules: 2842
Actin Cytoskeleton: 2219
NEMO granules: 2935
Autophagosomes: 2651
PML bodies: 2297
Golgi: 2371
ER: 3056
Transport machinery: 2622
FMRP granules: 2913
Nucleolus: 2709
MOM: 2363
hnRNP complex: 2728
Mitochondria: 2728
TIA1 granules: 2712
Postsynapse: 2101
ANXA11 granules: 2614
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996335     0.932796     0.997760 0.903242 0.998492 0.917781                     NaN
 Actin Cytoskeleton  0.997687     0.913210     0.999678 0.985256 0.997959 0.947866                     NaN
     Autophagosomes  0.988885     0.563735     0.996377 0.732809 0.992342 0.637248                     NaN
    Coated vesicles  0.994867     0.811572     0.999621 0.982294 0.995135 0.888809                     NaN
                 ER  0.998162     0.964861     0.998993 0.959856 0.999123 0.962352                     NaN
      FMRP granules  0.991324     0.792927     0.995917 0.818067 0.995209 0.805301                     NaN
              Golgi  0.999023     0.980964     0.999457 0.977518 0.999542 0.979238                     NaN
           Lysosome  0.997715     0.952215     0.998896 0.957227 0.998760 0.954715                     NaN
                MOM  0.998697     0.976356     0.999242 0.969167 0.999423 0.972748                     NaN
        Microtubule  0.993035     0.991257     0.993461 0.973184 0.997897 0.982137                     NaN
       Mitochondria  0.998852     0.978065     0.999361 0.974017 0.999463 0.976037                     NaN
      NEMO granules  0.997218     0.966719     0.997991 0.924194 0.999156 0.944978                     NaN
          Nucleolus  0.999299     0.994666     0.999412 0.976326 0.999870 0.985411                     NaN
            Nucleus  0.999349     1.000000     0.999118 0.997518 1.000000 0.998757                     NaN
           P-Bodies  0.991931     0.818246     0.996073 0.832449 0.995668 0.825287                     NaN
         PML bodies  0.993460     0.727695     0.998605 0.909884 0.994749 0.808655                     NaN
      PURA granules  0.997285     0.987066     0.997526 0.903926 0.999694 0.943668                     NaN
       Paraspeckles  0.997842     0.945460     0.998924 0.947786 0.998873 0.946621                     NaN
         Peroxisome  0.997997     0.937643     0.999334 0.968964 0.998619 0.953046                     NaN
        Postsynapse  0.988658     0.805603     0.992300 0.675445 0.996118 0.734804                     NaN
         Presynapse  0.994464     0.973545     0.994910 0.802967 0.999434 0.880067                     NaN
    Stress granules  0.998438     0.976554     0.999014 0.963089 0.999382 0.969775                     NaN
    TDP-43 granules  0.997003     0.932361     0.998377 0.924270 0.998562 0.928298                     NaN
      TIA1 granules  0.995684     0.951249     0.996729 0.872467 0.998851 0.910156                     NaN
Transport machinery  0.995452     0.855670     0.998981 0.954977 0.996366 0.902600                     NaN
      hnRNP complex  0.997798     0.921727     0.999661 0.985192 0.998086 0.952404                     NaN
      Macro Average  0.996018     0.909699     0.997912 0.918157 0.997953 0.911875                0.948231
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [2], Testing on: [1, 3].

=== Fold (test=[1, 3]) ===
Train: (94059, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (202717, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
P-Bodies: 2319
Autophagosomes: 1654
ANXA11 granules: 2123
Microtubule: 18531
Nucleus: 24823
ER: 2079
NEMO granules: 2360
Presynapse: 1923
MOM: 2114
Transport machinery: 2104
Actin Cytoskeleton: 2019
Peroxisome: 2074
Golgi: 2110
Mitochondria: 2236
Nucleolus: 2227
Coated vesicles: 2536
FMRP granules: 2608
Postsynapse: 1631
PML bodies: 1818
Stress granules: 2265
TDP-43 granules: 1934
Paraspeckles: 1916
PURA granules: 2090
TIA1 granules: 2086
hnRNP complex: 2236
Lysosome: 2243
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996971     0.925851     0.998573 0.935915 0.998331 0.930856                     NaN
 Actin Cytoskeleton  0.997820     0.973004     0.998366 0.929211 0.999404 0.950603                     NaN
     Autophagosomes  0.988575     0.546312     0.997784 0.836977 0.990621 0.661106                     NaN
    Coated vesicles  0.994367     0.778720     0.999243 0.958803 0.995017 0.859429                     NaN
                 ER  0.997977     0.964180     0.998900 0.959904 0.999022 0.962037                     NaN
      FMRP granules  0.991402     0.732804     0.997146 0.850817 0.994083 0.787413                     NaN
              Golgi  0.998782     0.985607     0.999082 0.960717 0.999672 0.973003                     NaN
           Lysosome  0.997716     0.954318     0.998905 0.959821 0.998748 0.957062                     NaN
                MOM  0.999063     0.985536     0.999374 0.973166 0.999667 0.979312                     NaN
        Microtubule  0.993745     0.992654     0.994005 0.975329 0.998239 0.983916                     NaN
       Mitochondria  0.999176     0.975534     0.999752 0.989693 0.999404 0.982562                     NaN
      NEMO granules  0.996063     0.988124     0.996266 0.871204 0.999695 0.925988                     NaN
          Nucleolus  0.999329     0.986859     0.999631 0.984804 0.999682 0.985830                     NaN
            Nucleus  0.999640     1.000000     0.999512 0.998625 1.000000 0.999312                     NaN
           P-Bodies  0.992857     0.873623     0.995420 0.803883 0.997279 0.837303                     NaN
         PML bodies  0.993326     0.788265     0.997369 0.855245 0.995831 0.820390                     NaN
      PURA granules  0.998372     0.981447     0.998782 0.951303 0.999550 0.966140                     NaN
       Paraspeckles  0.998219     0.961125     0.999037 0.956532 0.999143 0.958823                     NaN
         Peroxisome  0.997760     0.938761     0.999057 0.956308 0.998654 0.947454                     NaN
        Postsynapse  0.988807     0.774725     0.993121 0.694115 0.995450 0.732208                     NaN
         Presynapse  0.995319     0.915797     0.997046 0.870754 0.998168 0.892708                     NaN
    Stress granules  0.998722     0.977038     0.999296 0.973499 0.999392 0.975265                     NaN
    TDP-43 granules  0.997479     0.942334     0.998694 0.940827 0.998729 0.941580                     NaN
      TIA1 granules  0.995733     0.936534     0.997166 0.888845 0.998462 0.912067                     NaN
Transport machinery  0.995521     0.918072     0.997471 0.901420 0.997936 0.909670                     NaN
      hnRNP complex  0.998343     0.960813     0.999257 0.969253 0.999045 0.965015                     NaN
      Macro Average  0.996196     0.913771     0.998010 0.921037 0.998047 0.915271                0.950542
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [3], Testing on: [1, 2].

=== Fold (test=[1, 2]) ===
Train: (87130, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (209646, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Microtubule: 16469
Transport machinery: 2358
ER: 2332
Nucleus: 22599
PML bodies: 1623
TIA1 granules: 2078
Paraspeckles: 1751
Coated vesicles: 2044
PURA granules: 2085
FMRP granules: 1492
Postsynapse: 1903
Lysosome: 2340
Peroxisome: 1855
TDP-43 granules: 1836
ANXA11 granules: 1850
NEMO granules: 2117
Actin Cytoskeleton: 2152
Presynapse: 1857
P-Bodies: 1901
MOM: 2200
hnRNP complex: 2095
Nucleolus: 2085
Autophagosomes: 1484
Stress granules: 2384
Golgi: 2145
Mitochondria: 2095
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996995     0.963057     0.997780 0.909308 0.999145 0.935411                     NaN
 Actin Cytoskeleton  0.997615     0.971213     0.998160 0.915888 0.999405 0.942739                     NaN
     Autophagosomes  0.978340     0.300116     0.992559 0.458156 0.985432 0.362667                     NaN
    Coated vesicles  0.997601     0.964623     0.998402 0.936208 0.999139 0.950203                     NaN
                 ER  0.996427     0.964752     0.997223 0.897139 0.999113 0.929718                     NaN
      FMRP granules  0.981688     0.425648     0.996727 0.778661 0.984654 0.550416                     NaN
              Golgi  0.998898     0.987280     0.999152 0.962157 0.999722 0.974557                     NaN
           Lysosome  0.997057     0.933710     0.998703 0.949263 0.998278 0.941422                     NaN
                MOM  0.998979     0.988832     0.999201 0.964278 0.999756 0.976401                     NaN
        Microtubule  0.997529     0.991564     0.998985 0.995825 0.997943 0.993690                     NaN
       Mitochondria  0.999175     0.985898     0.999497 0.979388 0.999658 0.982632                     NaN
      NEMO granules  0.996742     0.901794     0.999202 0.966991 0.997460 0.933255                     NaN
          Nucleolus  0.999523     0.985616     0.999858 0.994074 0.999653 0.989827                     NaN
            Nucleus  0.999948     1.000000     0.999929 0.999801 1.000000 0.999900                     NaN
           P-Bodies  0.987031     0.862268     0.989881 0.660668 0.996831 0.748124                     NaN
         PML bodies  0.992712     0.842041     0.995728 0.797836 0.996834 0.819343                     NaN
      PURA granules  0.998631     0.954602     0.999663 0.985171 0.998937 0.969646                     NaN
       Paraspeckles  0.998178     0.958131     0.999064 0.957709 0.999074 0.957920                     NaN
         Peroxisome  0.996847     0.963748     0.997586 0.899144 0.999189 0.930326                     NaN
        Postsynapse  0.990241     0.562433     0.997994 0.835589 0.992116 0.672325                     NaN
         Presynapse  0.995049     0.820425     0.998772 0.934426 0.996181 0.873723                     NaN
    Stress granules  0.996623     0.986098     0.996886 0.887714 0.999652 0.934323                     NaN
    TDP-43 granules  0.996027     0.948075     0.997071 0.875749 0.998867 0.910478                     NaN
      TIA1 granules  0.996356     0.897249     0.998677 0.940778 0.997596 0.918498                     NaN
Transport machinery  0.982876     0.918959     0.984350 0.575232 0.998105 0.707559                     NaN
      hnRNP complex  0.997381     0.912369     0.999443 0.975447 0.997878 0.942854                     NaN
      Macro Average  0.994787     0.884250     0.997327 0.885869 0.997331 0.878768                0.932233
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
=== Evaluation Metrics (from aggregated confusion) ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996785     0.941324     0.998044 0.916150 0.998667 0.928566                     NaN
 Actin Cytoskeleton  0.997707     0.952895     0.998693 0.941331 0.998963 0.947078                     NaN
     Autophagosomes  0.985054     0.459492     0.995510 0.670616 0.989314 0.545333                     NaN
    Coated vesicles  0.995662     0.855321     0.999061 0.956657 0.996504 0.903155                     NaN
                 ER  0.997486     0.964577     0.998336 0.937337 0.999085 0.950762                     NaN
      FMRP granules  0.987947     0.629474     0.996623 0.818561 0.991082 0.711672                     NaN
              Golgi  0.998896     0.984682     0.999221 0.966521 0.999650 0.975517                     NaN
           Lysosome  0.997483     0.946536     0.998831 0.955403 0.998586 0.950949                     NaN
                MOM  0.998922     0.983675     0.999273 0.968874 0.999624 0.976219                     NaN
        Microtubule  0.994865     0.991840     0.995593 0.981885 0.998030 0.986838                     NaN
       Mitochondria  0.999077     0.979955     0.999543 0.981206 0.999512 0.980580                     NaN
      NEMO granules  0.996656     0.950823     0.997830 0.918181 0.998739 0.934217                     NaN
          Nucleolus  0.999388     0.988819     0.999645 0.985381 0.999729 0.987097                     NaN
            Nucleus  0.999660     1.000000     0.999539 0.998704 1.000000 0.999352                     NaN
           P-Bodies  0.990516     0.851838     0.993663 0.753072 0.996628 0.799416                     NaN
         PML bodies  0.993150     0.789387     0.997167 0.846003 0.995853 0.816715                     NaN
      PURA granules  0.998132     0.973791     0.998710 0.947179 0.999377 0.960301                     NaN
       Paraspeckles  0.998089     0.955478     0.999012 0.954416 0.999036 0.954946                     NaN
         Peroxisome  0.997510     0.947311     0.998622 0.938414 0.998832 0.942842                     NaN
        Postsynapse  0.989268     0.714108     0.994594 0.718828 0.994467 0.716460                     NaN
         Presynapse  0.994963     0.899824     0.997004 0.865664 0.997849 0.882413                     NaN
    Stress granules  0.997894     0.979976     0.998358 0.939227 0.999481 0.959169                     NaN
    TDP-43 granules  0.996821     0.941386     0.998024 0.911808 0.998727 0.926361                     NaN
      TIA1 granules  0.995938     0.927283     0.997566 0.900374 0.998274 0.913631                     NaN
Transport machinery  0.991034     0.898715     0.993291 0.766125 0.997513 0.827140                     NaN
      hnRNP complex  0.997837     0.931789     0.999446 0.976180 0.998340 0.953468                     NaN
      Macro Average  0.995644     0.901550     0.997738 0.904388 0.997764 0.901161                 0.94337

=== Average Metrics Across Folds ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996767     0.940568     0.998037 0.916155 0.998656 0.928016                     NaN
 Actin Cytoskeleton  0.997707     0.952476     0.998735 0.943452 0.998923 0.947070                     NaN
     Autophagosomes  0.985266     0.470054     0.995573 0.675980 0.989465 0.553674                     NaN
    Coated vesicles  0.995611     0.851638     0.999089 0.959101 0.996431 0.899480                     NaN
                 ER  0.997522     0.964597     0.998372 0.938966 0.999086 0.951369                     NaN
      FMRP granules  0.988138     0.650459     0.996597 0.815849 0.991315 0.714377                     NaN
              Golgi  0.998901     0.984617     0.999230 0.966797 0.999645 0.975599                     NaN
           Lysosome  0.997496     0.946748     0.998835 0.955437 0.998596 0.951066                     NaN
                MOM  0.998913     0.983575     0.999272 0.968870 0.999615 0.976154                     NaN
        Microtubule  0.994770     0.991825     0.995484 0.981446 0.998026 0.986581                     NaN
       Mitochondria  0.999068     0.979832     0.999537 0.981033 0.999508 0.980411                     NaN
      NEMO granules  0.996675     0.952212     0.997820 0.920796 0.998770 0.934740                     NaN
          Nucleolus  0.999384     0.989047     0.999634 0.985068 0.999735 0.987023                     NaN
            Nucleus  0.999645     1.000000     0.999520 0.998648 1.000000 0.999323                     NaN
           P-Bodies  0.990606     0.851379     0.993791 0.765667 0.996593 0.803571                     NaN
         PML bodies  0.993166     0.786001     0.997234 0.854321 0.995805 0.816129                     NaN
      PURA granules  0.998096     0.974372     0.998657 0.946800 0.999394 0.959818                     NaN
       Paraspeckles  0.998080     0.954905     0.999008 0.954009 0.999030 0.954455                     NaN
         Peroxisome  0.997535     0.946717     0.998659 0.941472 0.998821 0.943609                     NaN
        Postsynapse  0.989235     0.714254     0.994472 0.735050 0.994561 0.713113                     NaN
         Presynapse  0.994944     0.903256     0.996910 0.869383 0.997928 0.882166                     NaN
    Stress granules  0.997928     0.979896     0.998399 0.941434 0.999476 0.959787                     NaN
    TDP-43 granules  0.996836     0.940923     0.998047 0.913616 0.998720 0.926785                     NaN
      TIA1 granules  0.995924     0.928344     0.997524 0.900697 0.998303 0.913574                     NaN
Transport machinery  0.991283     0.897567     0.993601 0.810543 0.997469 0.839943                     NaN
      hnRNP complex  0.997841     0.931636     0.999454 0.976631 0.998336 0.953424                     NaN
      Macro Average  0.995667     0.902573     0.997750 0.908355 0.997777 0.901971                0.943669
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Out[11]:
{'Accuracy': 0.9956668237232452,
 'Sensitivity': 0.9025730353738828,
 'Specificity': 0.9977496228381414,
 'PPV': 0.9083546275272392,
 'NPV': 0.9977771596059583,
 'F1': 0.9019713749781145,
 'Correct/Total Accuracy': 0.9436687084021894}

Cytoself¶

In [6]:
Cytoself_dataset_config = {
    "path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/",
    "multiplexed": False,
    "config_fmt": "NIH_UMAP1_DatasetConfig_B{batch}",
    "config_dir": "manuscript/manuscript_figures_data_config",
}
In [7]:
run_baseline_model(
    dataset_config= Cytoself_dataset_config,
    **common
)
2025-09-17 11:27:35 INFO: [load_embeddings] multiplex=False
2025-09-17 11:27:35 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:27:35 INFO: [load_embeddings] input_folders = ['batch1']
2025-09-17 11:27:35 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-09-17 11:27:51 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:27:54 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:27:55 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:27:56 INFO: [load_embeddings] embeddings shape: (115590, 2048)
2025-09-17 11:27:56 INFO: [load_embeddings] labels shape: (115590,)
2025-09-17 11:27:56 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-09-17 11:27:56 INFO: [load_embeddings] paths shape: (115590,)
2025-09-17 11:27:57 INFO: [load_embeddings] multiplex=False
2025-09-17 11:27:57 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:27:57 INFO: [load_embeddings] input_folders = ['batch2']
2025-09-17 11:27:57 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-09-17 11:28:08 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:28:10 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:28:11 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:28:12 INFO: [load_embeddings] embeddings shape: (94059, 2048)
2025-09-17 11:28:12 INFO: [load_embeddings] labels shape: (94059,)
2025-09-17 11:28:12 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-09-17 11:28:12 INFO: [load_embeddings] paths shape: (94059,)
2025-09-17 11:28:12 INFO: [load_embeddings] multiplex=False
2025-09-17 11:28:12 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:28:12 INFO: [load_embeddings] input_folders = ['batch3']
2025-09-17 11:28:12 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
2025-09-17 11:28:25 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:28:27 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:28:28 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:28:28 INFO: [load_embeddings] embeddings shape: (87130, 2048)
2025-09-17 11:28:28 INFO: [load_embeddings] labels shape: (87130,)
2025-09-17 11:28:28 INFO: [load_embeddings] example label: TIA1_WT_Untreated
2025-09-17 11:28:28 INFO: [load_embeddings] paths shape: (87130,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3].

=== Fold (test=[2, 3]) ===
Train: (115590, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (181189, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
TIA1 granules: 2712
ANXA11 granules: 2614
Coated vesicles: 2439
ER: 3056
Nucleus: 30429
P-Bodies: 2364
FMRP granules: 2913
hnRNP complex: 2728
Stress granules: 2842
Golgi: 2371
Transport machinery: 2622
Lysosome: 3067
Mitochondria: 2728
Nucleolus: 2709
NEMO granules: 2935
Paraspeckles: 2623
Peroxisome: 2505
PML bodies: 2297
Postsynapse: 2101
PURA granules: 2712
Actin Cytoskeleton: 2219
Presynapse: 2454
Autophagosomes: 2651
TDP-43 granules: 2535
MOM: 2363
Microtubule: 22601
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.992030     0.837402     0.995497 0.806545 0.996352 0.821684                     NaN
 Actin Cytoskeleton  0.996683     0.889954     0.999198 0.963155 0.997412 0.925109                     NaN
     Autophagosomes  0.982057     0.420331     0.991957 0.479462 0.989806 0.447954                     NaN
    Coated vesicles  0.993785     0.782751     0.999258 0.964747 0.994394 0.864272                     NaN
                 ER  0.997312     0.950805     0.998473 0.939516 0.998772 0.945127                     NaN
      FMRP granules  0.983498     0.754390     0.988802 0.609338 0.994282 0.674150                     NaN
              Golgi  0.996578     0.920094     0.998417 0.933254 0.998079 0.926627                     NaN
           Lysosome  0.996197     0.947414     0.997463 0.906472 0.998634 0.926491                     NaN
                MOM  0.997770     0.949235     0.998954 0.956776 0.998762 0.952990                     NaN
        Microtubule  0.994829     0.993000     0.995266 0.980478 0.998319 0.986699                     NaN
       Mitochondria  0.998228     0.961903     0.999118 0.963906 0.999067 0.962903                     NaN
      NEMO granules  0.994983     0.890328     0.997635 0.905086 0.997223 0.897647                     NaN
          Nucleolus  0.998620     0.983766     0.998982 0.959294 0.999604 0.971376                     NaN
            Nucleus  0.999305     0.999768     0.999140 0.997580 0.999918 0.998673                     NaN
           P-Bodies  0.987554     0.680806     0.994869 0.759852 0.992407 0.718160                     NaN
         PML bodies  0.988570     0.601569     0.996062 0.747292 0.992316 0.666559                     NaN
      PURA granules  0.995469     0.971976     0.996023 0.852163 0.999337 0.908135                     NaN
       Paraspeckles  0.996280     0.901282     0.998242 0.913741 0.997961 0.907468                     NaN
         Peroxisome  0.996037     0.859252     0.999069 0.953403 0.996887 0.903882                     NaN
        Postsynapse  0.984199     0.609791     0.991647 0.592196 0.992233 0.600864                     NaN
         Presynapse  0.990711     0.892593     0.992802 0.725435 0.997700 0.800380                     NaN
    Stress granules  0.996208     0.957195     0.997236 0.901175 0.998871 0.928340                     NaN
    TDP-43 granules  0.994812     0.885411     0.997137 0.867915 0.997564 0.876576                     NaN
      TIA1 granules  0.992069     0.870077     0.994939 0.801726 0.996938 0.834504                     NaN
Transport machinery  0.988520     0.681533     0.996271 0.821892 0.991994 0.745160                     NaN
      hnRNP complex  0.995695     0.877857     0.998581 0.938071 0.997014 0.906966                     NaN
      Macro Average  0.993385     0.848865     0.996578 0.855403 0.996609 0.849950                0.914001
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [2], Testing on: [1, 3].

=== Fold (test=[1, 3]) ===
Train: (94059, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (202720, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
TIA1 granules: 2086
ANXA11 granules: 2123
Coated vesicles: 2536
ER: 2079
Nucleus: 24823
P-Bodies: 2319
FMRP granules: 2608
hnRNP complex: 2236
Stress granules: 2265
Golgi: 2110
Transport machinery: 2104
Lysosome: 2243
Mitochondria: 2236
Nucleolus: 2227
NEMO granules: 2360
Paraspeckles: 1916
Peroxisome: 2074
PML bodies: 1818
Postsynapse: 1631
PURA granules: 2090
Actin Cytoskeleton: 2019
Presynapse: 1923
Autophagosomes: 1654
TDP-43 granules: 1934
MOM: 2114
Microtubule: 18531
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.992847     0.831541     0.996479 0.841723 0.996208 0.836601                     NaN
 Actin Cytoskeleton  0.996902     0.950126     0.997933 0.910147 0.998900 0.929707                     NaN
     Autophagosomes  0.980663     0.448609     0.991742 0.530758 0.988556 0.486239                     NaN
    Coated vesicles  0.994061     0.813295     0.998149 0.908547 0.995788 0.858286                     NaN
                 ER  0.997731     0.967706     0.998551 0.948000 0.999118 0.957752                     NaN
      FMRP granules  0.981802     0.579796     0.990732 0.581512 0.990667 0.580652                     NaN
              Golgi  0.995309     0.916740     0.997099 0.878049 0.998101 0.896978                     NaN
           Lysosome  0.996014     0.932125     0.997765 0.919540 0.998139 0.925790                     NaN
                MOM  0.997869     0.962744     0.998678 0.943716 0.999142 0.953135                     NaN
        Microtubule  0.995116     0.992577     0.995723 0.982270 0.998223 0.987397                     NaN
       Mitochondria  0.998214     0.961227     0.999116 0.963625 0.999055 0.962425                     NaN
      NEMO granules  0.992413     0.958036     0.993292 0.784950 0.998921 0.862899                     NaN
          Nucleolus  0.997928     0.965999     0.998702 0.947422 0.999176 0.956621                     NaN
            Nucleus  0.999477     0.999227     0.999566 0.998775 0.999726 0.999001                     NaN
           P-Bodies  0.987564     0.695193     0.993847 0.708313 0.993452 0.701692                     NaN
         PML bodies  0.987826     0.638520     0.994713 0.704277 0.992885 0.669789                     NaN
      PURA granules  0.997425     0.948718     0.998606 0.942822 0.998757 0.945761                     NaN
       Paraspeckles  0.996295     0.911980     0.998155 0.915959 0.998059 0.913965                     NaN
         Peroxisome  0.995422     0.851147     0.998593 0.930075 0.996734 0.888862                     NaN
        Postsynapse  0.983618     0.557193     0.992210 0.590368 0.991088 0.573301                     NaN
         Presynapse  0.991180     0.792160     0.995504 0.792895 0.995484 0.792527                     NaN
    Stress granules  0.997109     0.951588     0.998314 0.937241 0.998718 0.944360                     NaN
    TDP-43 granules  0.994771     0.881720     0.997262 0.876507 0.997393 0.879106                     NaN
      TIA1 granules  0.992852     0.880167     0.995579 0.828128 0.997096 0.853355                     NaN
Transport machinery  0.987885     0.712851     0.994811 0.775787 0.992783 0.742989                     NaN
      hnRNP complex  0.997139     0.930334     0.998767 0.948425 0.998303 0.939292                     NaN
      Macro Average  0.993286     0.847359     0.996534 0.849609 0.996557 0.847634                0.912717
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [3], Testing on: [1, 2].

=== Fold (test=[1, 2]) ===
Train: (87130, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (209649, 2048) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
TIA1 granules: 2078
ANXA11 granules: 1850
Coated vesicles: 2044
ER: 2332
Nucleus: 22599
P-Bodies: 1901
FMRP granules: 1492
hnRNP complex: 2095
Stress granules: 2384
Golgi: 2145
Transport machinery: 2358
Lysosome: 2340
Mitochondria: 2095
Nucleolus: 2085
NEMO granules: 2117
Paraspeckles: 1751
Peroxisome: 1855
PML bodies: 1623
Postsynapse: 1903
PURA granules: 2085
Actin Cytoskeleton: 2152
Presynapse: 1857
Autophagosomes: 1484
TDP-43 granules: 1836
MOM: 2200
Microtubule: 16469
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.992053     0.851805     0.995296 0.807161 0.996570 0.828882                     NaN
 Actin Cytoskeleton  0.995640     0.947853     0.996626 0.852866 0.998922 0.897854                     NaN
     Autophagosomes  0.973747     0.291289     0.988054 0.338279 0.985185 0.313030                     NaN
    Coated vesicles  0.994963     0.875980     0.997855 0.908484 0.996988 0.891936                     NaN
                 ER  0.997310     0.966894     0.998073 0.926479 0.999168 0.946255                     NaN
      FMRP granules  0.974042     0.270965     0.993058 0.513560 0.980531 0.354755                     NaN
              Golgi  0.995015     0.937068     0.996281 0.846231 0.998622 0.889336                     NaN
           Lysosome  0.996256     0.919586     0.998248 0.931692 0.997911 0.925599                     NaN
                MOM  0.998455     0.960688     0.999279 0.966734 0.999142 0.963702                     NaN
        Microtubule  0.996995     0.988671     0.999027 0.995983 0.997240 0.992314                     NaN
       Mitochondria  0.998626     0.984287     0.998974 0.958791 0.999619 0.971372                     NaN
      NEMO granules  0.991925     0.790557     0.997142 0.877568 0.994587 0.831793                     NaN
          Nucleolus  0.997987     0.975891     0.998520 0.940820 0.999418 0.958035                     NaN
            Nucleus  0.999227     0.999023     0.999301 0.998047 0.999650 0.998535                     NaN
           P-Bodies  0.980568     0.708947     0.986773 0.550489 0.993306 0.619750                     NaN
         PML bodies  0.982475     0.678979     0.988552 0.542840 0.993540 0.603325                     NaN
      PURA granules  0.997262     0.906706     0.999385 0.971875 0.997816 0.938160                     NaN
       Paraspeckles  0.996342     0.934347     0.997713 0.900425 0.998546 0.917072                     NaN
         Peroxisome  0.995340     0.936886     0.996645 0.861792 0.998588 0.897771                     NaN
        Postsynapse  0.979990     0.396034     0.990574 0.432290 0.989070 0.413369                     NaN
         Presynapse  0.989826     0.664382     0.996765 0.814110 0.992872 0.731664                     NaN
    Stress granules  0.996699     0.934600     0.998250 0.930228 0.998367 0.932409                     NaN
    TDP-43 granules  0.993274     0.853435     0.996320 0.834756 0.996806 0.843992                     NaN
      TIA1 granules  0.992030     0.773030     0.997159 0.864367 0.994697 0.816151                     NaN
Transport machinery  0.979976     0.759204     0.985068 0.539711 0.994394 0.630913                     NaN
      hnRNP complex  0.995297     0.845286     0.998935 0.950612 0.996258 0.894860                     NaN
      Macro Average  0.991589     0.813554     0.995687 0.809854 0.995685 0.807801                 0.89066
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
=== Evaluation Metrics (from aggregated confusion) ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.992318     0.840595     0.995761 0.818236 0.996379 0.829265                     NaN
 Actin Cytoskeleton  0.996390     0.929734     0.997856 0.905157 0.998453 0.917281                     NaN
     Autophagosomes  0.978646     0.382449     0.990507 0.444891 0.987749 0.411314                     NaN
    Coated vesicles  0.994295     0.825545     0.998383 0.925196 0.995785 0.872534                     NaN
                 ER  0.997454     0.962435     0.998358 0.938002 0.999030 0.950061                     NaN
      FMRP granules  0.979579     0.509269     0.990962 0.576932 0.988157 0.540993                     NaN
              Golgi  0.995593     0.924691     0.997212 0.883362 0.998278 0.903554                     NaN
           Lysosome  0.996155     0.932353     0.997844 0.919611 0.998209 0.925938                     NaN
                MOM  0.998046     0.957691     0.998974 0.955544 0.999026 0.956616                     NaN
        Microtubule  0.995692     0.991311     0.996747 0.986558 0.997905 0.988929                     NaN
       Mitochondria  0.998364     0.969542     0.999066 0.961979 0.999258 0.965746                     NaN
      NEMO granules  0.993025     0.877766     0.995977 0.848240 0.996866 0.862750                     NaN
          Nucleolus  0.998160     0.974932     0.998723 0.948718 0.999392 0.961647                     NaN
            Nucleus  0.999336     0.999319     0.999342 0.998152 0.999758 0.998736                     NaN
           P-Bodies  0.985090     0.695474     0.991661 0.654236 0.993081 0.674225                     NaN
         PML bodies  0.986163     0.641948     0.992949 0.642228 0.992941 0.642088                     NaN
      PURA granules  0.996770     0.941121     0.998092 0.921387 0.998600 0.931150                     NaN
       Paraspeckles  0.996307     0.916932     0.998026 0.909557 0.998201 0.913229                     NaN
         Peroxisome  0.995581     0.884131     0.998051 0.909505 0.997434 0.896639                     NaN
        Postsynapse  0.982514     0.520319     0.991460 0.541109 0.990723 0.530511                     NaN
         Presynapse  0.990559     0.777751     0.995125 0.773903 0.995231 0.775822                     NaN
    Stress granules  0.996689     0.947537     0.997962 0.923317 0.998641 0.935270                     NaN
    TDP-43 granules  0.994255     0.872799     0.996891 0.859038 0.997238 0.865864                     NaN
      TIA1 granules  0.992323     0.839732     0.995942 0.830732 0.996198 0.835208                     NaN
Transport machinery  0.985285     0.718450     0.991810 0.682056 0.993106 0.699780                     NaN
      hnRNP complex  0.996048     0.884332     0.998770 0.945977 0.997186 0.914116                     NaN
      Macro Average  0.992717     0.835314     0.996248 0.834755 0.996263 0.834587                0.905318

=== Average Metrics Across Folds ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.992310     0.840250     0.995757 0.818477 0.996376 0.829056                     NaN
 Actin Cytoskeleton  0.996408     0.929311     0.997919 0.908723 0.998411 0.917557                     NaN
     Autophagosomes  0.978822     0.386743     0.990584 0.449500 0.987849 0.415741                     NaN
    Coated vesicles  0.994270     0.824009     0.998421 0.927260 0.995723 0.871498                     NaN
                 ER  0.997451     0.961802     0.998366 0.937998 0.999019 0.949711                     NaN
      FMRP granules  0.979781     0.535050     0.990864 0.568137 0.988493 0.536519                     NaN
              Golgi  0.995634     0.924634     0.997266 0.885845 0.998267 0.904314                     NaN
           Lysosome  0.996156     0.933042     0.997825 0.919235 0.998228 0.925960                     NaN
                MOM  0.998031     0.957556     0.998970 0.955742 0.999015 0.956609                     NaN
        Microtubule  0.995647     0.991416     0.996672 0.986244 0.997927 0.988803                     NaN
       Mitochondria  0.998356     0.969139     0.999069 0.962107 0.999247 0.965567                     NaN
      NEMO granules  0.993107     0.879641     0.996023 0.855868 0.996910 0.864113                     NaN
          Nucleolus  0.998179     0.975219     0.998735 0.949179 0.999399 0.962011                     NaN
            Nucleus  0.999336     0.999339     0.999336 0.998134 0.999765 0.998736                     NaN
           P-Bodies  0.985229     0.694982     0.991830 0.672885 0.993055 0.679867                     NaN
         PML bodies  0.986290     0.639690     0.993109 0.664803 0.992914 0.646558                     NaN
      PURA granules  0.996719     0.942467     0.998004 0.922287 0.998637 0.930685                     NaN
       Paraspeckles  0.996306     0.915869     0.998037 0.910041 0.998189 0.912835                     NaN
         Peroxisome  0.995600     0.882428     0.998103 0.915090 0.997403 0.896839                     NaN
        Postsynapse  0.982602     0.521006     0.991477 0.538285 0.990797 0.529178                     NaN
         Presynapse  0.990572     0.783045     0.995024 0.777480 0.995352 0.774857                     NaN
    Stress granules  0.996672     0.947794     0.997933 0.922881 0.998652 0.935036                     NaN
    TDP-43 granules  0.994286     0.873522     0.996906 0.859726 0.997254 0.866558                     NaN
      TIA1 granules  0.992317     0.841091     0.995892 0.831407 0.996243 0.834670                     NaN
Transport machinery  0.985460     0.717863     0.992050 0.712463 0.993057 0.706354                     NaN
      hnRNP complex  0.996044     0.884492     0.998761 0.945703 0.997191 0.913706                     NaN
      Macro Average  0.992753     0.836592     0.996267 0.838288 0.996284 0.835128                0.905793
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Out[7]:
{'Accuracy': 0.992753296238766,
 'Sensitivity': 0.8365922757073809,
 'Specificity': 0.9962666530158648,
 'PPV': 0.8382883760889336,
 'NPV': 0.9962837269271777,
 'F1': 0.8351283651750085,
 'Correct/Total Accuracy': 0.9057928511039575}

Pretrained Model¶

In [8]:
pretrained_dataset_config = {
    "path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model",
    "multiplexed": False,
    "config_fmt": "NIH_UMAP1_DatasetConfig_B{batch}",
    "config_dir": "manuscript/manuscript_figures_data_config",
}
In [9]:
run_baseline_model(
    dataset_config= pretrained_dataset_config,
        **common
)
2025-09-17 11:34:26 INFO: [load_embeddings] multiplex=False
2025-09-17 11:34:26 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:34:26 INFO: [load_embeddings] input_folders = ['batch1']
2025-09-17 11:34:26 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
Loading all batches...
2025-09-17 11:34:30 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:34:31 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:34:32 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:34:32 INFO: [load_embeddings] embeddings shape: (115590, 192)
2025-09-17 11:34:32 INFO: [load_embeddings] labels shape: (115590,)
2025-09-17 11:34:32 INFO: [load_embeddings] example label: CLTC_WT_Untreated
2025-09-17 11:34:32 INFO: [load_embeddings] paths shape: (115590,)
2025-09-17 11:34:32 INFO: [load_embeddings] multiplex=False
2025-09-17 11:34:32 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:34:32 INFO: [load_embeddings] input_folders = ['batch2']
2025-09-17 11:34:32 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-09-17 11:34:35 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:34:35 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:34:36 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:34:36 INFO: [load_embeddings] embeddings shape: (94059, 192)
2025-09-17 11:34:36 INFO: [load_embeddings] labels shape: (94059,)
2025-09-17 11:34:36 INFO: [load_embeddings] example label: DAPI_WT_Untreated
2025-09-17 11:34:36 INFO: [load_embeddings] paths shape: (94059,)
2025-09-17 11:34:36 INFO: [load_embeddings] multiplex=False
2025-09-17 11:34:36 INFO: [load_embeddings] experiment_type = NIH
2025-09-17 11:34:36 INFO: [load_embeddings] input_folders = ['batch3']
2025-09-17 11:34:36 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
2025-09-17 11:34:39 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41']
2025-09-17 11:34:40 INFO: [embeddings_utils._filter] cell_lines = ['WT']
2025-09-17 11:34:40 INFO: [embeddings_utils._filter] conditions = ['Untreated']
2025-09-17 11:34:40 INFO: [load_embeddings] embeddings shape: (87130, 192)
2025-09-17 11:34:40 INFO: [load_embeddings] labels shape: (87130,)
2025-09-17 11:34:40 INFO: [load_embeddings] example label: MitoTracker_WT_Untreated
2025-09-17 11:34:40 INFO: [load_embeddings] paths shape: (87130,)
Batches loaded.
Training on Batches: [1], Testing on: [2, 3].

=== Fold (test=[2, 3]) ===
Train: (115590, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (181189, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Coated vesicles: 2439
Nucleus: 30429
Golgi: 2371
PURA granules: 2712
Postsynapse: 2101
Autophagosomes: 2651
Presynapse: 2454
Stress granules: 2842
Peroxisome: 2505
Transport machinery: 2622
NEMO granules: 2935
Mitochondria: 2728
TIA1 granules: 2712
Nucleolus: 2709
Microtubule: 22601
Paraspeckles: 2623
ANXA11 granules: 2614
P-Bodies: 2364
MOM: 2363
FMRP granules: 2913
hnRNP complex: 2728
PML bodies: 2297
ER: 3056
TDP-43 granules: 2535
Actin Cytoskeleton: 2219
Lysosome: 3067
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996313     0.926001     0.997890 0.907723 0.998340 0.916770                     NaN
 Actin Cytoskeleton  0.998256     0.939343     0.999644 0.984175 0.998572 0.961237                     NaN
     Autophagosomes  0.990756     0.678776     0.996254 0.761530 0.994350 0.717776                     NaN
    Coated vesicles  0.993940     0.776856     0.999570 0.979086 0.994244 0.866326                     NaN
                 ER  0.998587     0.968488     0.999338 0.973342 0.999214 0.970909                     NaN
      FMRP granules  0.988040     0.708293     0.994517 0.749419 0.993255 0.728276                     NaN
              Golgi  0.998306     0.976028     0.998841 0.952960 0.999423 0.964356                     NaN
           Lysosome  0.998444     0.971634     0.999139 0.966992 0.999264 0.969308                     NaN
                MOM  0.999018     0.986555     0.999322 0.972578 0.999672 0.979517                     NaN
        Microtubule  0.994244     0.991829     0.994822 0.978659 0.998037 0.985200                     NaN
       Mitochondria  0.998377     0.972062     0.999022 0.960529 0.999316 0.966261                     NaN
      NEMO granules  0.996407     0.961358     0.997295 0.900042 0.999019 0.929690                     NaN
          Nucleolus  0.999393     0.991187     0.999593 0.983433 0.999785 0.987295                     NaN
            Nucleus  0.999404     0.999937     0.999215 0.997791 0.999978 0.998863                     NaN
           P-Bodies  0.991407     0.763507     0.996841 0.852156 0.994375 0.805399                     NaN
         PML bodies  0.991749     0.727114     0.996872 0.818182 0.994729 0.769965                     NaN
      PURA granules  0.997583     0.982275     0.997944 0.918477 0.999581 0.949306                     NaN
       Paraspeckles  0.997351     0.923643     0.998873 0.944243 0.998423 0.933830                     NaN
         Peroxisome  0.998273     0.946806     0.999413 0.972803 0.998822 0.959629                     NaN
        Postsynapse  0.990877     0.796265     0.994748 0.751001 0.995942 0.772971                     NaN
         Presynapse  0.993079     0.945238     0.994098 0.773377 0.998828 0.850714                     NaN
    Stress granules  0.996275     0.978920     0.996732 0.887480 0.999443 0.930960                     NaN
    TDP-43 granules  0.996396     0.929178     0.997824 0.900746 0.998494 0.914741                     NaN
      TIA1 granules  0.995982     0.950048     0.997063 0.883825 0.998823 0.915741                     NaN
Transport machinery  0.994851     0.843568     0.998670 0.941235 0.996061 0.889729                     NaN
      hnRNP complex  0.997494     0.914108     0.999536 0.979708 0.997900 0.945772                     NaN
      Macro Average  0.995800     0.905731     0.997811 0.911211 0.997842 0.906944                  0.9454
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [2], Testing on: [1, 3].

=== Fold (test=[1, 3]) ===
Train: (94059, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (202720, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Nucleus: 24823
Stress granules: 2265
TDP-43 granules: 1934
Nucleolus: 2227
NEMO granules: 2360
Microtubule: 18531
Peroxisome: 2074
PML bodies: 1818
Transport machinery: 2104
FMRP granules: 2608
TIA1 granules: 2086
Paraspeckles: 1916
ER: 2079
Mitochondria: 2236
Coated vesicles: 2536
Presynapse: 1923
P-Bodies: 2319
Golgi: 2110
Lysosome: 2243
hnRNP complex: 2236
Postsynapse: 1631
MOM: 2114
Actin Cytoskeleton: 2019
Autophagosomes: 1654
ANXA11 granules: 2123
PURA granules: 2090
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996734     0.925403     0.998341 0.926233 0.998320 0.925818                     NaN
 Actin Cytoskeleton  0.997978     0.974834     0.998488 0.934225 0.999445 0.954098                     NaN
     Autophagosomes  0.989873     0.615478     0.997669 0.846077 0.992039 0.712586                     NaN
    Coated vesicles  0.994707     0.803480     0.999031 0.949394 0.995571 0.870364                     NaN
                 ER  0.998426     0.966592     0.999296 0.974004 0.999088 0.970284                     NaN
      FMRP granules  0.987446     0.633598     0.995305 0.749866 0.991889 0.686846                     NaN
              Golgi  0.998259     0.983835     0.998587 0.940716 0.999631 0.961792                     NaN
           Lysosome  0.997884     0.955798     0.999037 0.964539 0.998789 0.960149                     NaN
                MOM  0.999048     0.990357     0.999248 0.968081 0.999778 0.979092                     NaN
        Microtubule  0.995023     0.991067     0.995967 0.983241 0.997863 0.987138                     NaN
       Mitochondria  0.998668     0.967862     0.999419 0.975957 0.999217 0.971893                     NaN
      NEMO granules  0.995141     0.978029     0.995578 0.849699 0.999436 0.909359                     NaN
          Nucleolus  0.999438     0.988319     0.999707 0.987907 0.999717 0.988113                     NaN
            Nucleus  0.999714     0.999906     0.999646 0.999001 0.999967 0.999453                     NaN
           P-Bodies  0.991427     0.766002     0.996271 0.815323 0.994978 0.789894                     NaN
         PML bodies  0.990997     0.763010     0.995493 0.769488 0.995328 0.766235                     NaN
      PURA granules  0.998446     0.971857     0.999091 0.962825 0.999318 0.967320                     NaN
       Paraspeckles  0.997879     0.953589     0.998856 0.948386 0.998976 0.950980                     NaN
         Peroxisome  0.997805     0.942661     0.999017 0.954704 0.998740 0.948644                     NaN
        Postsynapse  0.990973     0.803946     0.994741 0.754925 0.996044 0.778665                     NaN
         Presynapse  0.994016     0.913477     0.995766 0.824194 0.998116 0.866542                     NaN
    Stress granules  0.997208     0.974550     0.997808 0.921643 0.999326 0.947359                     NaN
    TDP-43 granules  0.997035     0.932052     0.998467 0.930562 0.998503 0.931306                     NaN
      TIA1 granules  0.995866     0.932359     0.997403 0.896787 0.998361 0.914227                     NaN
Transport machinery  0.994860     0.904418     0.997138 0.888363 0.997592 0.896318                     NaN
      hnRNP complex  0.998461     0.964545     0.999288 0.970582 0.999136 0.967554                     NaN
      Macro Average  0.995897     0.907578     0.997871 0.911028 0.997891 0.907770                0.946655
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [3], Testing on: [1, 2].

=== Fold (test=[1, 2]) ===
Train: (87130, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Test: (209649, 192) Labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
Mitochondria: 2095
hnRNP complex: 2095
Nucleus: 22599
Peroxisome: 1855
Microtubule: 16469
Nucleolus: 2085
Actin Cytoskeleton: 2152
MOM: 2200
Golgi: 2145
Autophagosomes: 1484
Coated vesicles: 2044
TDP-43 granules: 1836
TIA1 granules: 2078
NEMO granules: 2117
Lysosome: 2340
PML bodies: 1623
Postsynapse: 1903
P-Bodies: 1901
ER: 2332
Stress granules: 2384
PURA granules: 2085
Transport machinery: 2358
ANXA11 granules: 1850
Presynapse: 1857
FMRP granules: 1492
Paraspeckles: 1751
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.
  warnings.warn(
=== Evaluation Metrics ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.995936     0.972556     0.996477 0.864515 0.999364 0.915359                     NaN
 Actin Cytoskeleton  0.997973     0.972393     0.998501 0.930458 0.999430 0.950963                     NaN
     Autophagosomes  0.981841     0.439257     0.993216 0.575822 0.988302 0.498353                     NaN
    Coated vesicles  0.997067     0.949749     0.998217 0.928291 0.998778 0.938897                     NaN
                 ER  0.997696     0.969815     0.998396 0.938206 0.999241 0.953749                     NaN
      FMRP granules  0.974257     0.160840     0.996257 0.537530 0.977726 0.247595                     NaN
              Golgi  0.998335     0.986387     0.998596 0.938828 0.999702 0.962020                     NaN
           Lysosome  0.997858     0.953861     0.999002 0.961283 0.998801 0.957557                     NaN
                MOM  0.999404     0.988162     0.999649 0.983986 0.999742 0.986069                     NaN
        Microtubule  0.997567     0.991831     0.998967 0.995753 0.998008 0.993788                     NaN
       Mitochondria  0.998655     0.969984     0.999350 0.973120 0.999272 0.971550                     NaN
      NEMO granules  0.995755     0.873654     0.998919 0.954405 0.996733 0.912246                     NaN
          Nucleolus  0.999680     0.992504     0.999853 0.993914 0.999819 0.993208                     NaN
            Nucleus  0.999919     1.000000     0.999890 0.999692 1.000000 0.999846                     NaN
           P-Bodies  0.979137     0.849883     0.982090 0.520193 0.996520 0.645371                     NaN
         PML bodies  0.990961     0.783961     0.995105 0.762287 0.995672 0.772972                     NaN
      PURA granules  0.998483     0.946481     0.999702 0.986756 0.998747 0.966199                     NaN
       Paraspeckles  0.997548     0.963648     0.998298 0.926106 0.999195 0.944504                     NaN
         Peroxisome  0.997176     0.977506     0.997615 0.901511 0.999497 0.937972                     NaN
        Postsynapse  0.990675     0.709271     0.995775 0.752630 0.994736 0.730308                     NaN
         Presynapse  0.994224     0.766507     0.999079 0.946670 0.995041 0.847115                     NaN
    Stress granules  0.997768     0.950460     0.998949 0.957585 0.998763 0.954009                     NaN
    TDP-43 granules  0.994076     0.905795     0.995999 0.831382 0.997944 0.866995                     NaN
      TIA1 granules  0.994853     0.843268     0.998404 0.925223 0.996337 0.882347                     NaN
Transport machinery  0.984417     0.912399     0.986078 0.601814 0.997955 0.725254                     NaN
      hnRNP complex  0.996313     0.866035     0.999472 0.975494 0.996760 0.917511                     NaN
      Macro Average  0.994137     0.872931     0.996994 0.871671 0.997003 0.864298                0.923787
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
=== Evaluation Metrics (from aggregated confusion) ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996324     0.942538     0.997545 0.897052 0.998694 0.919233                     NaN
 Actin Cytoskeleton  0.998061     0.962441     0.998845 0.948269 0.999173 0.955303                     NaN
     Autophagosomes  0.987305     0.567110     0.995665 0.722412 0.991425 0.635409                     NaN
    Coated vesicles  0.995306     0.846631     0.998908 0.949433 0.996295 0.895090                     NaN
                 ER  0.998218     0.968260     0.998991 0.961181 0.999181 0.964707                     NaN
      FMRP granules  0.982969     0.469343     0.995400 0.711753 0.987262 0.565671                     NaN
              Golgi  0.998300     0.982191     0.998668 0.943941 0.999593 0.962686                     NaN
           Lysosome  0.998046     0.959869     0.999056 0.964154 0.998938 0.962007                     NaN
                MOM  0.999164     0.988393     0.999412 0.974815 0.999733 0.981557                     NaN
        Microtubule  0.995684     0.991571     0.996674 0.986263 0.997967 0.988910                     NaN
       Mitochondria  0.998575     0.969897     0.999273 0.970171 0.999267 0.970034                     NaN
      NEMO granules  0.995744     0.935712     0.997282 0.898148 0.998352 0.916546                     NaN
          Nucleolus  0.999510     0.990671     0.999724 0.988629 0.999774 0.989649                     NaN
            Nucleus  0.999692     0.999949     0.999600 0.998877 0.999982 0.999413                     NaN
           P-Bodies  0.987080     0.795033     0.991437 0.678088 0.995331 0.731919                     NaN
         PML bodies  0.991214     0.759759     0.995777 0.780084 0.995266 0.769788                     NaN
      PURA granules  0.998196     0.966168     0.998957 0.956515 0.999196 0.961318                     NaN
       Paraspeckles  0.997601     0.948490     0.998664 0.938936 0.998884 0.943689                     NaN
         Peroxisome  0.997726     0.956326     0.998643 0.939820 0.999032 0.948001                     NaN
        Postsynapse  0.990838     0.770186     0.995109 0.752949 0.995550 0.761470                     NaN
         Presynapse  0.993803     0.871511     0.996427 0.839592 0.997241 0.855254                     NaN
    Stress granules  0.997121     0.967695     0.997883 0.922089 0.999162 0.944341                     NaN
    TDP-43 granules  0.995795     0.921887     0.997399 0.884973 0.998303 0.903053                     NaN
      TIA1 granules  0.995544     0.906632     0.997653 0.901584 0.997785 0.904101                     NaN
Transport machinery  0.991169     0.887916     0.993693 0.774917 0.997249 0.827577                     NaN
      hnRNP complex  0.997407     0.914435     0.999429 0.975002 0.997918 0.943748                     NaN
      Macro Average  0.995246     0.893870     0.997543 0.894602 0.997560 0.892326                0.938195

=== Average Metrics Across Folds ===
              Label  Accuracy  Sensitivity  Specificity      PPV      NPV       F1  Correct/Total Accuracy
    ANXA11 granules  0.996328     0.941320     0.997569 0.899490 0.998675 0.919316                     NaN
 Actin Cytoskeleton  0.998069     0.962190     0.998877 0.949619 0.999149 0.955433                     NaN
     Autophagosomes  0.987490     0.577837     0.995713 0.727810 0.991563 0.642905                     NaN
    Coated vesicles  0.995238     0.843361     0.998939 0.952257 0.996198 0.891862                     NaN
                 ER  0.998237     0.968298     0.999010 0.961851 0.999181 0.964981                     NaN
      FMRP granules  0.983248     0.500910     0.995360 0.678938 0.987623 0.554239                     NaN
              Golgi  0.998300     0.982083     0.998675 0.944168 0.999586 0.962723                     NaN
           Lysosome  0.998062     0.960431     0.999059 0.964271 0.998951 0.962338                     NaN
                MOM  0.999156     0.988358     0.999406 0.974881 0.999730 0.981559                     NaN
        Microtubule  0.995611     0.991576     0.996585 0.985884 0.997970 0.988709                     NaN
       Mitochondria  0.998567     0.969969     0.999264 0.969869 0.999268 0.969901                     NaN
      NEMO granules  0.995768     0.937680     0.997264 0.901382 0.998396 0.917098                     NaN
          Nucleolus  0.999504     0.990670     0.999718 0.988418 0.999774 0.989539                     NaN
            Nucleus  0.999679     0.999947     0.999584 0.998828 0.999981 0.999387                     NaN
           P-Bodies  0.987323     0.793131     0.991734 0.729224 0.995291 0.746888                     NaN
         PML bodies  0.991236     0.758029     0.995823 0.783319 0.995243 0.769724                     NaN
      PURA granules  0.998171     0.966871     0.998912 0.956020 0.999215 0.960942                     NaN
       Paraspeckles  0.997593     0.946960     0.998676 0.939578 0.998865 0.943105                     NaN
         Peroxisome  0.997751     0.955657     0.998682 0.943006 0.999019 0.948748                     NaN
        Postsynapse  0.990842     0.769827     0.995088 0.752852 0.995574 0.760648                     NaN
         Presynapse  0.993773     0.875074     0.996315 0.848080 0.997328 0.854791                     NaN
    Stress granules  0.997083     0.967977     0.997829 0.922236 0.999177 0.944109                     NaN
    TDP-43 granules  0.995836     0.922342     0.997430 0.887563 0.998314 0.904347                     NaN
      TIA1 granules  0.995567     0.908558     0.997623 0.901945 0.997840 0.904105                     NaN
Transport machinery  0.991376     0.886795     0.993962 0.810471 0.997203 0.837101                     NaN
      hnRNP complex  0.997423     0.914896     0.999432 0.975261 0.997932 0.943612                     NaN
      Macro Average  0.995278     0.895413     0.997559 0.897970 0.997579 0.893004                0.938614
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Out[9]:
{'Accuracy': 0.9952779985005705,
 'Sensitivity': 0.8954134727257955,
 'Specificity': 0.9975588468110604,
 'PPV': 0.8979701162038985,
 'NPV': 0.9975787633190297,
 'F1': 0.8930041853910872,
 'Correct/Total Accuracy': 0.9386139805074171}
In [ ]: